網(wǎng)站建設(shè)合同 文庫(kù)怎么更改網(wǎng)站欄目id
鶴壁市浩天電氣有限公司
2026/01/24 07:05:53
網(wǎng)站建設(shè)合同 文庫(kù),怎么更改網(wǎng)站欄目id,深圳網(wǎng)站公司推廣平臺(tái),wordpress顯示代碼框HDF5完整文件結(jié)構(gòu)與操作指南
目錄
完整文件結(jié)構(gòu)概覽基礎(chǔ)數(shù)據(jù)集類型組結(jié)構(gòu)操作屬性系統(tǒng)高級(jí)數(shù)據(jù)類型引用和鏈接壓縮和分塊可擴(kuò)展數(shù)據(jù)集維度標(biāo)簽完整示例代碼 1. 完整文件結(jié)構(gòu)概覽
1.1 理想的HDF5文件結(jié)構(gòu)
comprehensive_example.h5 # 根文件
│
├── metadata/ # 元數(shù)據(jù)組 │ ├── attrs: {title, author, version} # 組屬性 │ ├── description (string) # 描述文本 │ ├── creation_date (datetime) # 創(chuàng)建日期 │ └── parameters (structured) # 參數(shù)結(jié)構(gòu)體 │ ├── raw_data/ # 原始數(shù)據(jù)組 │ ├── sensor_1 (1D float32) # 1維數(shù)據(jù) │ ├── sensor_2 (1D float32) │ ├── images (3D uint8) # 3維圖像數(shù)據(jù) │ │ ├── attrs: {resolution, units} # 數(shù)據(jù)集屬性 │ │ └── dims: [time, height, width] # 維度標(biāo)簽 │ └── measurements (2D float64) # 2維測(cè)量數(shù)據(jù) │ ├── processed_data/ # 處理后數(shù)據(jù)組 │ ├── filtered (2D compressed) # 壓縮數(shù)據(jù)集 │ ├── normalized (2D chunked) # 分塊數(shù)據(jù)集 │ └── statistics (compound type) # 復(fù)合數(shù)據(jù)類型 │ ├── models/ # 模型組 │ ├── neural_network/ # 神經(jīng)網(wǎng)絡(luò)子組 │ │ ├── layer_1_weights (2D) │ │ ├── layer_1_biases (1D) │ │ ├── layer_2_weights (2D) │ │ └── layer_2_biases (1D) │ └── config/ │ └── hyperparameters (JSON) │ ├── time_series/ # 時(shí)間序列數(shù)據(jù) │ ├── data (resizable 2D) # 可擴(kuò)展數(shù)據(jù)集 │ ├── timestamps (1D datetime) │ └── labels (1D categorical) │ ├── references/ # 引用和鏈接 │ ├── link_to_raw - /raw_data # 軟鏈接 │ ├── external_link - file.h5:/data # 外部鏈接 │ └── object_references (refs) # 對(duì)象引用 │ ├── special_types/ # 特殊數(shù)據(jù)類型 │ ├── string_array (variable length)# 變長(zhǎng)字符串 │ ├── boolean_mask (bool) # 布爾類型 │ ├── enum_data (enum) # 枚舉類型 │ ├── complex_numbers (complex) # 復(fù)數(shù) │ └── nested_compound (nested) # 嵌套結(jié)構(gòu)體 │ └── large_data/ # 大數(shù)據(jù)集 ├── chunked_compressed (gzip) # 分塊壓縮 ├── lzf_compressed (lzf) # LZF壓縮 └── virtual_dataset (virtual) # 虛擬數(shù)據(jù)集2. 基礎(chǔ)數(shù)據(jù)集類型2.1 數(shù)值類型數(shù)據(jù)集2.1.1 整數(shù)類型importh5pyimportnumpyasnpwithh5py.File(example.h5,w)asf:# 有符號(hào)整數(shù)f.create_dataset(int8_data,datanp.array([1,2,3],dtypenp.int8))f.create_dataset(int16_data,datanp.array([100,200],dtypenp.int16))f.create_dataset(int32_data,datanp.array([1000,2000],dtypenp.int32))f.create_dataset(int64_data,datanp.array([10000,20000],dtypenp.int64))# 無(wú)符號(hào)整數(shù)f.create_dataset(uint8_data,datanp.array([255,128],dtypenp.uint8))f.create_dataset(uint16_data,datanp.array([65535],dtypenp.uint16))f.create_dataset(uint32_data,datanp.array([4294967295],dtypenp.uint32))f.create_dataset(uint64_data,datanp.array([2**63],dtypenp.uint64))# 讀取操作withh5py.File(example.h5,r)asf:int8_dataf[int8_data][:]print(f數(shù)據(jù)類型:{int8_data.dtype})print(f數(shù)據(jù):{int8_data})2.1.2 浮點(diǎn)類型withh5py.File(example.h5,w)asf:# 單精度浮點(diǎn)f.create_dataset(float32_data,datanp.array([3.14,2.71],dtypenp.float32))# 雙精度浮點(diǎn)f.create_dataset(float64_data,datanp.array([3.141592653589793],dtypenp.float64))# 半精度浮點(diǎn)節(jié)省空間f.create_dataset(float16_data,datanp.array([1.5,2.5],dtypenp.float16))# 讀取并查看精度withh5py.File(example.h5,r)asf:fornamein[float32_data,float64_data,float16_data]:dataf[name][:]print(f{name}: dtype{data.dtype}, precision{data.itemsize*8}bits)2.1.3 復(fù)數(shù)類型withh5py.File(example.h5,w)asf:# 復(fù)數(shù)類型complex_datanp.array([12j,34j,56j],dtypenp.complex64)f.create_dataset(complex64,datacomplex_data)# 雙精度復(fù)數(shù)complex_data_highnp.array([12j,34j],dtypenp.complex128)f.create_dataset(complex128,datacomplex_data_high)# 讀取和處理復(fù)數(shù)withh5py.File(example.h5,r)asf:c_dataf[complex64][:]print(f實(shí)部:{c_data.real})print(f虛部:{c_data.imag})print(f模:{np.abs(c_data)})print(f相位:{np.angle(c_data)})2.1.4 布爾類型withh5py.File(example.h5,w)asf:# 布爾數(shù)組bool_datanp.array([True,False,True,True],dtypebool)f.create_dataset(boolean_mask,databool_data)# 布爾矩陣用于掩碼bool_matrixnp.random.rand(100,100)0.5f.create_dataset(random_mask,databool_matrix)# 讀取和應(yīng)用掩碼withh5py.File(example.h5,r)asf:maskf[random_mask][:]# 可以用于過(guò)濾數(shù)據(jù)datanp.random.randn(100,100)filtered_datadata[mask]print(f掩碼選中了{(lán)mask.sum()}個(gè)元素)2.2 字符串類型數(shù)據(jù)集2.2.1 固定長(zhǎng)度字符串withh5py.File(example.h5,w)asf:# 固定長(zhǎng)度ASCII字符串fixed_stringsnp.array([bhello,bworld,btest],dtypeS10)f.create_dataset(fixed_ascii,datafixed_strings)# 固定長(zhǎng)度Unicode字符串fixed_unicodenp.array([你好,世界,測(cè)試],dtypeU10)f.create_dataset(fixed_unicode,datafixed_unicode)# 讀取字符串withh5py.File(example.h5,r)asf:ascii_dataf[fixed_ascii][:]unicode_dataf[fixed_unicode][:]print(fASCII:{ascii_data})print(fUnicode:{unicode_data})2.2.2 變長(zhǎng)字符串withh5py.File(example.h5,w)asf:# 變長(zhǎng)ASCII字符串dt_asciih5py.string_dtype(encodingascii)var_strings[short,a very long string,medium]f.create_dataset(variable_ascii,datavar_strings,dtypedt_ascii)# 變長(zhǎng)UTF-8字符串dt_utf8h5py.string_dtype(encodingutf-8)var_unicode[短,這是一個(gè)很長(zhǎng)的中文字符串,中等長(zhǎng)度]f.create_dataset(variable_utf8,datavar_unicode,dtypedt_utf8)# 讀取變長(zhǎng)字符串withh5py.File(example.h5,r)asf:var_asciif[variable_ascii][:]var_utf8f[variable_utf8][:]print(f變長(zhǎng)ASCII:{var_ascii})print(f變長(zhǎng)UTF-8:{var_utf8})# 單個(gè)元素訪問(wèn)print(f第一個(gè)元素:{f[variable_utf8][0]})2.3 多維數(shù)組2.3.1 一維數(shù)組向量withh5py.File(example.h5,w)asf:# 時(shí)間序列數(shù)據(jù)time_seriesnp.sin(np.linspace(0,10*np.pi,1000))dsetf.create_dataset(time_series,datatime_series)dset.attrs[description]Sine wavedset.attrs[sampling_rate]100# Hz# 讀取和處理withh5py.File(example.h5,r)asf:tsf[time_series]print(f形狀:{ts.shape})print(f長(zhǎng)度:{len(ts)})print(f采樣率:{ts.attrs[sampling_rate]}Hz)# 切片讀取first_100ts[:100]last_100ts[-100:]2.3.2 二維數(shù)組矩陣withh5py.File(example.h5,w)asf:# 圖像數(shù)據(jù)灰度圖imagenp.random.randint(0,256,(512,512),dtypenp.uint8)dsetf.create_dataset(grayscale_image,dataimage)dset.attrs[height]512dset.attrs[width]512dset.attrs[channels]1# 表格數(shù)據(jù)table_datanp.random.randn(1000,50)# 1000行50列f.create_dataset(table_data,datatable_data)# 讀取操作withh5py.File(example.h5,r)asf:# 讀取整個(gè)圖像imgf[grayscale_image][:]# 讀取圖像的一部分ROIroif[grayscale_image][100:200,100:200]# 讀取表格的特定行列col_5f[table_data][:,5]# 第5列row_10f[table_data][10,:]# 第10行subsetf[table_data][0:100,0:10]# 子集2.3.3 三維數(shù)組體數(shù)據(jù)withh5py.File(example.h5,w)asf:# RGB圖像序列videonp.random.randint(0,256,(100,480,640,3),dtypenp.uint8)dsetf.create_dataset(video_rgb,datavideo)dset.attrs[num_frames]100dset.attrs[height]480dset.attrs[width]640dset.attrs[channels]3dset.attrs[fps]30# 3D醫(yī)學(xué)圖像CT掃描ct_scannp.random.randn(256,256,128)# [x, y, z]f.create_dataset(ct_scan,datact_scan)# 讀取操作withh5py.File(example.h5,r)asf:# 讀取特定幀frame_10f[video_rgb][10,:,:,:]# 讀取時(shí)間切片time_slicef[video_rgb][0:50:5,:,:,:]# 每5幀取一幀# 讀取空間切片spatial_slicef[video_rgb][:,100:200,200:300,:]2.3.4 四維及更高維數(shù)組withh5py.File(example.h5,w)asf:# 4D: [batch, height, width, channels]batch_imagesnp.random.randn(32,224,224,3)f.create_dataset(image_batch,databatch_images)# 5D: [time, batch, height, width, channels]video_batchnp.random.randn(10,16,64,64,3)f.create_dataset(video_batch,datavideo_batch)# 讀取操作withh5py.File(example.h5,r)asf:# 讀取特定批次batch_0f[image_batch][0,:,:,:]# 復(fù)雜切片subsetf[video_batch][0:5,0:8,::2,::2,:]# 降采樣3. 組結(jié)構(gòu)操作3.1 創(chuàng)建和組織組withh5py.File(example.h5,w)asf:# 方法1: 直接創(chuàng)建組group1f.create_group(level1)# 方法2: 創(chuàng)建嵌套組group2f.create_group(level1/level2)# 方法3: 使用require_group存在則返回不存在則創(chuàng)建group3f.require_group(level1/level2/level3)# 在組中創(chuàng)建數(shù)據(jù)集group1.create_dataset(data1,datanp.arange(10))group2.create_dataset(data2,datanp.arange(20))group3.create_dataset(data3,datanp.arange(30))# 創(chuàng)建多個(gè)平行組foriinrange(5):groupf.create_group(fexperiment_{i})group.create_dataset(results,datanp.random.randn(100))group.attrs[experiment_id]i group.attrs[timestamp]f2024-01-{i1:02d}3.2 遍歷組結(jié)構(gòu)defprint_structure(name,obj):遞歸打印HDF5結(jié)構(gòu)indent *name.count(/)ifisinstance(obj,h5py.Group):print(f{indent}{name}/)elifisinstance(obj,h5py.Dataset):print(f{indent}{name}{obj.shape}{obj.dtype})withh5py.File(example.h5,r)asf:print(方法1: visititems)f.visititems(print_structure)print(
方法2: 手動(dòng)遍歷)defrecursive_print(group,level0):forkeyingroup.keys():itemgroup[key]indent *levelifisinstance(item,h5py.Group):print(f{indent}{key}/)recursive_print(item,level1)else:print(f{indent}{key}{item.shape})recursive_print(f)print(
方法3: 只遍歷特定組)forkeyinf[level1].keys():print(fFound:{key})3.3 移動(dòng)、復(fù)制和刪除組withh5py.File(example.h5,a)asf:# 復(fù)制組f.copy(level1,level1_copy)# 移動(dòng)組重命名f.move(level1_copy,level1_backup)# 刪除組delf[level1_backup]# 復(fù)制到另一個(gè)文件withh5py.File(destination.h5,w)asf_dest:f.copy(level1,f_dest,nameimported_data)3.4 按條件查找數(shù)據(jù)集deffind_datasets(group,condition):查找滿足條件的數(shù)據(jù)集results[]defsearch(name,obj):ifisinstance(obj,h5py.Dataset):ifcondition(name,obj):results.append(name)group.visititems(search)returnresultswithh5py.File(example.h5,r)asf:# 查找所有浮點(diǎn)數(shù)據(jù)集float_datasetsfind_datasets(f,lambdaname,obj:obj.dtype.kindf)# 查找大于特定大小的數(shù)據(jù)集large_datasetsfind_datasets(f,lambdaname,obj:obj.size1000)# 查找包含特定屬性的數(shù)據(jù)集with_attrfind_datasets(f,lambdaname,obj:experiment_idinobj.attrs)print(f浮點(diǎn)數(shù)據(jù)集:{float_datasets})print(f大數(shù)據(jù)集:{large_datasets})print(f有experiment_id屬性:{with_attr})4. 屬性系統(tǒng)4.1 文件級(jí)屬性withh5py.File(example.h5,w)asf:# 字符串屬性f.attrs[title]My Research Dataf.attrs[author]Josh Wittf.attrs[institution]University# 數(shù)值屬性f.attrs[version]1.0f.attrs[year]2024# 數(shù)組屬性f.attrs[dimensions][1024,768]f.attrs[channels][0,1,2]# 日期時(shí)間存儲(chǔ)為字符串fromdatetimeimportdatetime f.attrs[created]datetime.now().isoformat()# 布爾屬性f.attrs[is_validated]Truef.attrs[is_published]False# 讀取文件屬性withh5py.File(example.h5,r)asf:print(文件屬性:)forkey,valueinf.attrs.items():print(f{key}:{value})4.2 組級(jí)屬性withh5py.File(example.h5,w)asf:# 為不同的實(shí)驗(yàn)組添加元數(shù)據(jù)forexp_idinrange(3):groupf.create_group(fexperiment_{exp_id})# 實(shí)驗(yàn)參數(shù)group.attrs[temperature]20exp_id*5# ℃group.attrs[pressure]1.0exp_id*0.1# atmgroup.attrs[duration]3600# seconds# 實(shí)驗(yàn)狀態(tài)group.attrs[status]completedgroup.attrs[quality_score]0.95# 參考信息group.attrs[reference_paper]Smith et al., 2024group.attrs[doi]f10.1234/journal.{exp_id}# 讀取和篩選withh5py.File(example.h5,r)asf:# 找出所有高溫實(shí)驗(yàn)high_temp_exps[]forkeyinf.keys():iff[key].attrs.get(temperature,0)25:high_temp_exps.append(key)print(f高溫實(shí)驗(yàn):{high_temp_exps})4.3 數(shù)據(jù)集級(jí)屬性withh5py.File(example.h5,w)asf:# 創(chuàng)建數(shù)據(jù)集并添加詳細(xì)元數(shù)據(jù)datanp.random.randn(1000,100)dsetf.create_dataset(measurements,datadata)# 物理單位和量程dset.attrs[units]meters per seconddset.attrs[range][data.min(),data.max()]dset.attrs[mean]data.mean()dset.attrs[std]data.std()# 采集信息dset.attrs[sampling_rate]1000# Hzdset.attrs[num_channels]100dset.attrs[calibration_factor]1.05# 質(zhì)量控制dset.attrs[outliers_removed]5dset.attrs[missing_values]0dset.attrs[validated]True# 處理歷史dset.attrs[preprocessing]bandpass filter 0.1-100 Hzdset.attrs[detrended]True# 使用屬性進(jìn)行數(shù)據(jù)處理withh5py.File(example.h5,r)asf:dsetf[measurements]# 根據(jù)屬性校準(zhǔn)數(shù)據(jù)calibrationdset.attrs[calibration_factor]datadset[:]*calibration# 顯示統(tǒng)計(jì)信息print(f單位:{dset.attrs[units]})print(f均值:{dset.attrs[mean]:.2f})print(f標(biāo)準(zhǔn)差:{dset.attrs[std]:.2f})4.4 修改和刪除屬性withh5py.File(example.h5,a)asf:dsetf[measurements]# 修改現(xiàn)有屬性dset.attrs[version]2.0# 添加新屬性dset.attrs[last_modified]datetime.now().isoformat()# 刪除屬性iftemporaryindset.attrs:deldset.attrs[temporary]# 批量更新屬性new_attrs{processed:True,algorithm:FFT,window:Hamming}dset.attrs.update(new_attrs)5. 高級(jí)數(shù)據(jù)類型5.1 復(fù)合數(shù)據(jù)類型結(jié)構(gòu)體# 定義復(fù)合數(shù)據(jù)類型dtnp.dtype([(name,S50),# 固定長(zhǎng)度字符串(age,i4),# 32位整數(shù)(height,f4),# 32位浮點(diǎn)(weight,f4),(is_active,?),# 布爾(scores,f4,(3,))# 固定長(zhǎng)度數(shù)組])withh5py.File(example.h5,w)asf:# 創(chuàng)建結(jié)構(gòu)化數(shù)據(jù)集datanp.array([(bAlice,25,165.5,55.2,True,[90,85,88]),(bBob,30,175.0,70.5,True,[78,82,80]),(bCharlie,28,180.2,75.0,False,[95,92,89])],dtypedt)dsetf.create_dataset(people,datadata)dset.attrs[description]Personnel records# 讀取和訪問(wèn)withh5py.File(example.h5,r)asf:dataf[people][:]# 訪問(wèn)特定字段namesdata[name]agesdata[age]scoresdata[scores]# 訪問(wèn)特定記錄first_persondata[0]print(f第一個(gè)人:{first_person[name]}, 年齡:{first_person[age]})# 篩選數(shù)據(jù)active_peopledata[data[is_active]]adultsdata[data[age]18]5.2 嵌套復(fù)合類型# 定義嵌套結(jié)構(gòu)address_dtypenp.dtype([(street,S100),(city,S50),(zipcode,i4)])person_dtypenp.dtype([(id,i4),(name,S50),(address,address_dtype),# 嵌套結(jié)構(gòu)(salary,f8)])withh5py.File(example.h5,w)asf:datanp.array([(1,bAlice,(b123 Main St,bBoston,12345),75000.0),(2,bBob,(b456 Oak Ave,bNYC,10001),85000.0)],dtypeperson_dtype)f.create_dataset(employees,datadata)# 讀取嵌套數(shù)據(jù)withh5py.File(example.h5,r)asf:dataf[employees][:]# 訪問(wèn)嵌套字段citiesdata[address][city]zipcodesdata[address][zipcode]print(f員工城市:{cities})print(f郵編:{zipcodes})5.3 枚舉類型# 創(chuàng)建枚舉類型status_enumh5py.enum_dtype({PENDING:0,RUNNING:1,COMPLETED:2,FAILED:3},basetypei)withh5py.File(example.h5,w)asf:# 使用枚舉類型statusesnp.array([0,1,2,1,2,3],dtypestatus_enum)dsetf.create_dataset(task_status,datastatuses)# 讀取枚舉withh5py.File(example.h5,r)asf:statusesf[task_status][:]# 統(tǒng)計(jì)各狀態(tài)數(shù)量unique,countsnp.unique(statuses,return_countsTrue)forval,countinzip(unique,counts):print(f狀態(tài){val}:{count}個(gè))5.4 變長(zhǎng)數(shù)據(jù)類型withh5py.File(example.h5,w)asf:# 變長(zhǎng)整數(shù)數(shù)組vlen_inth5py.vlen_dtype(np.dtype(int32))datanp.array([[1,2,3],[4,5],[6,7,8,9,10]],dtypeobject)f.create_dataset(variable_length_arrays,datadata,dtypevlen_int)# 變長(zhǎng)浮點(diǎn)數(shù)組vlen_floath5py.vlen_dtype(np.dtype(float64))ragged_datanp.array([[1.1,2.2],[3.3,4.4,5.5,6.6],[7.7]],dtypeobject)f.create_dataset(ragged_arrays,dataragged_data,dtypevlen_float)# 讀取變長(zhǎng)數(shù)據(jù)withh5py.File(example.h5,r)asf:vlen_dataf[variable_length_arrays][:]fori,arrinenumerate(vlen_data):print(f行{i}: 長(zhǎng)度{len(arr)}, 數(shù)據(jù){arr})6. 引用和鏈接6.1 軟鏈接Soft Linkswithh5py.File(example.h5,w)asf:# 創(chuàng)建原始數(shù)據(jù)datanp.arange(100)f.create_dataset(data/original,datadata)# 創(chuàng)建軟鏈接f[link_to_original]h5py.SoftLink(/data/original)# 在其他組中創(chuàng)建鏈接f.create_group(analysis)f[analysis/data_link]h5py.SoftLink(/data/original)# 使用軟鏈接withh5py.File(example.h5,r)asf:# 通過(guò)鏈接訪問(wèn)數(shù)據(jù)data_via_linkf[link_to_original][:]data_originalf[data/original][:]# 驗(yàn)證是否指向同一數(shù)據(jù)print(f數(shù)據(jù)相同:{np.array_equal(data_via_link,data_original)})6.2 硬鏈接Hard Linkswithh5py.File(example.h5,w)asf:# 創(chuàng)建數(shù)據(jù)集datanp.random.randn(100)dsetf.create_dataset(original_data,datadata)# 創(chuàng)建硬鏈接多個(gè)名稱指向同一對(duì)象f[copy1]dset# 硬鏈接f[copy2]dset# 另一個(gè)硬鏈接# 刪除原始名稱數(shù)據(jù)仍然存在delf[original_data]# 通過(guò)其他名稱仍可訪問(wèn)data_via_copyf[copy1][:]6.3 外部鏈接External Links# 創(chuàng)建源文件withh5py.File(source.h5,w)asf:f.create_dataset(external_data,datanp.arange(1000))# 創(chuàng)建帶外部鏈接的文件withh5py.File(main.h5,w)asf:# 鏈接到另一個(gè)文件的數(shù)據(jù)集f[linked_data]h5py.ExternalLink(source.h5,/external_data)# 鏈接到另一個(gè)文件的組f[linked_group]h5py.ExternalLink(source.h5,/)# 使用外部鏈接withh5py.File(main.h5,r)asf:# 自動(dòng)訪問(wèn)外部文件的數(shù)據(jù)dataf[linked_data][:]print(f從外部文件讀取的數(shù)據(jù):{data[:10]})6.4 對(duì)象引用Object Referenceswithh5py.File(example.h5,w)asf:# 創(chuàng)建多個(gè)數(shù)據(jù)集dset1f.create_dataset(dataset_1,datanp.arange(10))dset2f.create_dataset(dataset_2,datanp.arange(20))dset3f.create_dataset(dataset_3,datanp.arange(30))# 創(chuàng)建對(duì)象引用數(shù)組ref_dtypeh5py.ref_dtype refsnp.array([dset1.ref,dset2.ref,dset3.ref],dtyperef_dtype)f.create_dataset(dataset_references,datarefs)# 使用對(duì)象引用withh5py.File(example.h5,r)asf:refsf[dataset_references][:]# 通過(guò)引用訪問(wèn)對(duì)象fori,refinenumerate(refs):dsetf[ref]print(f引用{i}指向:{dset.name}, 形狀:{dset.shape})datadset[:]print(f 數(shù)據(jù):{data})6.5 區(qū)域引用Region Referenceswithh5py.File(example.h5,w)asf:# 創(chuàng)建一個(gè)大數(shù)據(jù)集datanp.arange(1000).reshape(100,10)dsetf.create_dataset(large_dataset,datadata)# 創(chuàng)建區(qū)域引用# 引用特定的行region1dset.regionref[0:10,:]# 前10行region2dset.regionref[50:60,:]# 中間10行# 引用特定的列region3dset.regionref[:,0:5]# 前5列# 引用特定的矩形區(qū)域region4dset.regionref[20:30,3:7]# 子矩陣# 存儲(chǔ)區(qū)域引用ref_dtypeh5py.regionref_dtype regionsnp.array([region1,region2,region3,region4],dtyperef_dtype)f.create_dataset(regions,dataregions)# 使用區(qū)域引用withh5py.File(example.h5,r)asf:dsetf[large_dataset]regionsf[regions][:]fori,regioninenumerate(regions):# 通過(guò)區(qū)域引用讀取數(shù)據(jù)region_datadset[region]print(f區(qū)域{i}: 形狀 {region_data.shape})print(f 數(shù)據(jù)樣本:{region_data.ravel()[:5]})7. 壓縮和分塊7.1 壓縮方法對(duì)比importtime# 創(chuàng)建測(cè)試數(shù)據(jù)test_datanp.random.randn(10000,1000).astype(float32)withh5py.File(compression_test.h5,w)asf:# 無(wú)壓縮starttime.time()f.create_dataset(no_compression,datatest_data)time_no_comptime.time()-start# GZIP壓縮級(jí)別1-9forlevelin[1,4,9]:starttime.time()f.create_dataset(fgzip_level_{level},datatest_data,compressiongzip,compression_optslevel)time_gziptime.time()-startprint(fGZIP級(jí)別{level}:{time_gzip:.2f}秒)# LZF壓縮starttime.time()f.create_dataset(lzf_compression,datatest_data,compressionlzf)time_lzftime.time()-startprint(fLZF:{time_lzf:.2f}秒)# SZIP壓縮需要特殊編譯的HDF5try:f.create_dataset(szip_compression,datatest_data,compressionszip,compression_opts(nn,16))except:print(SZIP不可用)# 比較文件大小和讀取速度importoswithh5py.File(compression_test.h5,r)asf:fornameinf.keys():dsetf[name]# 讀取速度測(cè)試starttime.time()_dset[:]read_timetime.time()-start# 獲取存儲(chǔ)大小storage_sizedset.id.get_storage_size()print(f{name}:)print(f 存儲(chǔ)大小:{storage_size/1024/1024:.2f}MB)print(f 讀取時(shí)間:{read_time:.3f}秒)7.2 分塊策略withh5py.File(chunking_test.h5,w)asf:datanp.random.randn(10000,10000).astype(float32)# 自動(dòng)分塊f.create_dataset(auto_chunks,datadata,chunksTrue)# 按行分塊適合行遍歷f.create_dataset(row_chunks,datadata,chunks(100,10000))# 100行一塊# 按列分塊適合列遍歷f.create_dataset(col_chunks,datadata,chunks(10000,100))# 100列一塊# 方塊分塊適合隨機(jī)訪問(wèn)f.create_dataset(square_chunks,datadata,chunks(1000,1000))# 1000x1000的塊# 小塊分塊f.create_dataset(small_chunks,datadata,chunks(10,10))# 測(cè)試不同訪問(wèn)模式的性能withh5py.File(chunking_test.h5,r)asf:fornamein[row_chunks,col_chunks,square_chunks]:dsetf[name]# 行訪問(wèn)測(cè)試starttime.time()foriinrange(0,10000,1000):_dset[i,:]row_timetime.time()-start# 列訪問(wèn)測(cè)試starttime.time()forjinrange(0,10000,1000):_dset[:,j]col_timetime.time()-startprint(f{name}:)print(f 行訪問(wèn):{row_time:.2f}秒)print(f 列訪問(wèn):{col_time:.2f}秒)7.3 最優(yōu)分塊大小計(jì)算defcalculate_optimal_chunk_size(shape,dtype,target_chunk_size_mb1): 計(jì)算最優(yōu)分塊大小 參數(shù): shape: 數(shù)據(jù)集形狀 dtype: 數(shù)據(jù)類型 target_chunk_size_mb: 目標(biāo)塊大小MB element_sizenp.dtype(dtype).itemsize target_elements(target_chunk_size_mb*1024*1024)/element_size# 嘗試保持原始形狀的比例ndimlen(shape)chunk_shapelist(shape)total_elementsnp.prod(shape)iftotal_elementstarget_elements:returntuple(shape)# 縮小各維度scale(target_elements/total_elements)**(1/ndim)chunk_shape[max(1,int(dim*scale))fordiminshape]returntuple(chunk_shape)# 使用示例shape(10000,5000,3)dtypenp.float32 optimal_chunkscalculate_optimal_chunk_size(shape,dtype)print(f推薦的塊大小:{optimal_chunks})withh5py.File(optimal_chunks.h5,w)asf:datanp.random.randn(*shape).astype(dtype)f.create_dataset(data,datadata,chunksoptimal_chunks,compressiongzip,compression_opts4)7.4 Shuffle過(guò)濾器withh5py.File(shuffle_test.h5,w)asf:# 創(chuàng)建具有相關(guān)性的數(shù)據(jù)更容易壓縮datanp.arange(100000,dtypefloat32).reshape(1000,100)datanp.random.randn(1000,100)*0.1# 不使用shufflef.create_dataset(without_shuffle,datadata,compressiongzip,compression_opts9,shuffleFalse)# 使用shuffle通常能提高壓縮率f.create_dataset(with_shuffle,datadata,compressiongzip,compression_opts9,shuffleTrue)# 比較壓縮效果withh5py.File(shuffle_test.h5,r)asf:size_withoutf[without_shuffle].id.get_storage_size()size_withf[with_shuffle].id.get_storage_size()print(f不使用shuffle:{size_without/1024:.2f}KB)print(f使用shuffle:{size_with/1024:.2f}KB)print(f壓縮率提升:{(1-size_with/size_without)*100:.1f}%)8. 可擴(kuò)展數(shù)據(jù)集8.1 一維可擴(kuò)展數(shù)據(jù)集withh5py.File(resizable.h5,w)asf:# 創(chuàng)建可擴(kuò)展數(shù)據(jù)集dsetf.create_dataset(expandable_1d,shape(100,),maxshape(None,),# 可無(wú)限擴(kuò)展dtypefloat32,chunks(100,))# 初始數(shù)據(jù)dset[:]np.random.randn(100)# 追加數(shù)據(jù)withh5py.File(resizable.h5,a)asf:dsetf[expandable_1d]# 擴(kuò)展數(shù)據(jù)集old_sizedset.shape[0]new_datanp.random.randn(50)dset.resize(old_size50,axis0)dset[old_size:]new_dataprint(f新大小:{dset.shape})8.2 多維可擴(kuò)展數(shù)據(jù)集withh5py.File(resizable.h5,w)asf:# 創(chuàng)建2D可擴(kuò)展數(shù)據(jù)集dsetf.create_dataset(expandable_2d,shape(100,50),maxshape(None,50),# 只在第一維可擴(kuò)展dtypefloat32,chunks(10,50))dset[:]np.random.randn(100,50)# 追加行withh5py.File(resizable.h5,a)asf:dsetf[expandable_2d]old_rowsdset.shape[0]new_rows20dset.resize(old_rowsnew_rows,axis0)dset[old_rows:,:]np.random.randn(new_rows,50)8.3 流式數(shù)據(jù)寫(xiě)入defstream_data_writer(filename,chunk_size1000): 模擬流式數(shù)據(jù)寫(xiě)入 withh5py.File(filename,w)asf:# 創(chuàng)建可擴(kuò)展數(shù)據(jù)集dsetf.create_dataset(streaming_data,shape(0,100),maxshape(None,100),chunks(chunk_size,100),dtypefloat32)# 模擬連續(xù)數(shù)據(jù)流foriinrange(10):# 10批數(shù)據(jù)# 生成新數(shù)據(jù)new_datanp.random.randn(chunk_size,100)# 擴(kuò)展并寫(xiě)入old_sizedset.shape[0]dset.resize(old_sizechunk_size,axis0)dset[old_size:,:]new_dataprint(f批次{i1}: 累計(jì)大小 {dset.shape})# 使用stream_data_writer(streaming.h5)8.4 時(shí)間序列數(shù)據(jù)追加fromdatetimeimportdatetime,timedeltawithh5py.File(timeseries.h5,w)asf:# 創(chuàng)建時(shí)間戳數(shù)據(jù)集dt_typeh5py.string_dtype(encodingutf-8)timestampsf.create_dataset(timestamps,shape(0,),maxshape(None,),dtypedt_type,chunks(1000,))# 創(chuàng)建數(shù)值數(shù)據(jù)集valuesf.create_dataset(values,shape(0,10),maxshape(None,10),chunks(1000,10),dtypefloat32)# 初始化start_timedatetime.now()foriinrange(5):# 生成新時(shí)間戳current_timestart_timetimedelta(secondsi)timestamp_strcurrent_time.isoformat()# 生成新數(shù)據(jù)new_valuenp.random.randn(1,10)# 追加old_sizevalues.shape[0]timestamps.resize(old_size1,axis0)values.resize(old_size1,axis0)timestamps[old_size]timestamp_str values[old_size,:]new_value# 讀取時(shí)間序列withh5py.File(timeseries.h5,r)asf:tsf[timestamps][:]valsf[values][:]fort,vinzip(ts[:5],vals[:5]):print(f{t}:{v})9. 維度標(biāo)簽9.1 創(chuàng)建維度標(biāo)簽withh5py.File(dimensions.h5,w)asf:# 創(chuàng)建數(shù)據(jù)集datanp.random.randn(100,64,64,3)dsetf.create_dataset(video,datadata)# 創(chuàng)建維度標(biāo)簽數(shù)據(jù)集# 維度0: 時(shí)間time_scalef.create_dataset(time,datanp.arange(100))time_scale.attrs[units]frames# 維度1和2: 空間坐標(biāo)y_coordsf.create_dataset(y_coords,datanp.arange(64))x_coordsf.create_dataset(x_coords,datanp.arange(64))# 維度3: 顏色通道channelsf.create_dataset(channels,data[bR,bG,bB])# 附加維度標(biāo)簽dset.dims[0].labeltimedset.dims[1].labelydset.dims[2].labelxdset.dims[3].labelchannel# 附加維度標(biāo)度dimension scalesdset.dims[0].attach_scale(time_scale)dset.dims[1].attach_scale(y_coords)dset.dims[2].attach_scale(x_coords)dset.dims[3].attach_scale(channels)# 讀取維度信息withh5py.File(dimensions.h5,r)asf:dsetf[video]print(維度信息:)fori,diminenumerate(dset.dims):print(f 維度{i}:{dim.label})# 獲取維度標(biāo)度iflen(dim)0:scaledim[0]print(f 標(biāo)度:{scale.name})print(f 值:{scale[:5]}...)# 顯示前5個(gè)9.2 多個(gè)維度標(biāo)度withh5py.File(multi_scale.h5,w)asf:# 創(chuàng)建數(shù)據(jù)集datanp.random.randn(1000,100)dsetf.create_dataset(measurements,datadata)# 為第一維創(chuàng)建多個(gè)標(biāo)度# 標(biāo)度1: 采樣點(diǎn)索引indicesf.create_dataset(sample_indices,datanp.arange(1000))# 標(biāo)度2: 時(shí)間秒time_secondsf.create_dataset(time_seconds,datanp.arange(1000)*0.001)# 標(biāo)度3: 時(shí)間戳dt_typeh5py.string_dtype(encodingutf-8)timestamps[]start_timedatetime(2024,1,1,0,0,0)foriinrange(1000):tsstart_timetimedelta(millisecondsi)timestamps.append(ts.isoformat())f.create_dataset(timestamps,datatimestamps,dtypedt_type)# 附加所有標(biāo)度dset.dims[0].attach_scale(indices)dset.dims[0].attach_scale(time_seconds)dset.dims[0].attach_scale(f[timestamps])# 為第二維創(chuàng)建標(biāo)度channel_names[fChannel_{i}.encode()foriinrange(100)]channelsf.create_dataset(channel_names,datachannel_names)dset.dims[1].attach_scale(channels)# 使用維度標(biāo)度withh5py.File(multi_scale.h5,r)asf:dsetf[measurements]print(第一維的標(biāo)度:)forscaleindset.dims[0]:print(f{scale.name}:{scale[:3]}...)10. 完整示例代碼10.1 創(chuàng)建綜合示例文件 創(chuàng)建一個(gè)包含所有HDF5特性的綜合示例文件 importh5pyimportnumpyasnpfromdatetimeimportdatetimedefcreate_comprehensive_h5(filenamecomprehensive.h5):withh5py.File(filename,w)asf:# # 1. 文件級(jí)元數(shù)據(jù)# f.attrs[title]Comprehensive HDF5 Examplef.attrs[author]Josh Wittf.attrs[created]datetime.now().isoformat()f.attrs[version]1.0f.attrs[description]Contains all HDF5 data types and features# # 2. 基礎(chǔ)數(shù)值數(shù)據(jù)# basic_groupf.create_group(basic_types)# 各種數(shù)值類型basic_group.create_dataset(int32,datanp.arange(100,dtypei4))basic_group.create_dataset(float64,datanp.random.randn(100))basic_group.create_dataset(complex128,datanp.random.randn(50)1j*np.random.randn(50))basic_group.create_dataset(bool,datanp.random.rand(100)0.5)# 多維數(shù)組basic_group.create_dataset(matrix_2d,datanp.random.randn(100,50))basic_group.create_dataset(tensor_3d,datanp.random.randn(10,20,30))basic_group.create_dataset(tensor_4d,datanp.random.randn(5,10,20,3))# # 3. 字符串?dāng)?shù)據(jù)# string_groupf.create_group(strings)# 固定長(zhǎng)度string_group.create_dataset(fixed_ascii,datanp.array([bhello,bworld],dtypeS10))# 變長(zhǎng)字符串vlen_strh5py.string_dtype(encodingutf-8)string_group.create_dataset(variable_utf8,data[短,這是一個(gè)很長(zhǎng)的字符串,中],dtypevlen_str)# # 4. 復(fù)合數(shù)據(jù)類型# compound_groupf.create_group(compound_types)# 簡(jiǎn)單結(jié)構(gòu)體person_dtnp.dtype([(name,S50),(age,i4),(salary,f8)])person_datanp.array([(bAlice,25,75000.0),(bBob,30,85000.0),(bCharlie,28,80000.0)],dtypeperson_dt)compound_group.create_dataset(people,dataperson_data)# 嵌套結(jié)構(gòu)體nested_dtnp.dtype([(id,i4),(measurements,f4,(5,)),# 固定長(zhǎng)度數(shù)組(valid,?)])nested_datanp.array([(1,[1.1,2.2,3.3,4.4,5.5],True),(2,[6.6,7.7,8.8,9.9,10.0],False)],dtypenested_dt)compound_group.create_dataset(nested,datanested_data)# # 5. 壓縮數(shù)據(jù)# compression_groupf.create_group(compressed)test_datanp.random.randn(1000,1000).astype(float32)# 不同壓縮方法compression_group.create_dataset(gzip_level_1,datatest_data,compressiongzip,compression_opts1)compression_group.create_dataset(gzip_level_9,datatest_data,compressiongzip,compression_opts9,shuffleTrue)compression_group.create_dataset(lzf,datatest_data,compressionlzf)# # 6. 可擴(kuò)展數(shù)據(jù)集# expandable_groupf.create_group(expandable)# 1D可擴(kuò)展exp_1dexpandable_group.create_dataset(data_1d,shape(100,),maxshape(None,),chunks(100,),dtypefloat32)exp_1d[:]np.random.randn(100)# 2D可擴(kuò)展exp_2dexpandable_group.create_dataset(data_2d,shape(100,50),maxshape(None,50),chunks(100,50),dtypefloat32)exp_2d[:]np.random.randn(100,50)# # 7. 引用和鏈接# reference_groupf.create_group(references)# 創(chuàng)建被引用的數(shù)據(jù)target_datanp.arange(100)targetf.create_dataset(target_dataset,datatarget_data)# 軟鏈接reference_group[soft_link]h5py.SoftLink(/target_dataset)# 對(duì)象引用reftarget.ref reference_group.create_dataset(object_ref,dataref)# # 8. 屬性示例# attr_groupf.create_group(attributes_example)# 數(shù)據(jù)集with豐富的屬性sensor_datanp.random.randn(1000,10)sensor_dsetattr_group.create_dataset(sensor_readings,datasensor_data)# 各種類型的屬性sensor_dset.attrs[units]meters/secondsensor_dset.attrs[sampling_rate]1000.0sensor_dset.attrs[calibrated]Truesensor_dset.attrs[sensor_ids][1,2,3,4,5,6,7,8,9,10]sensor_dset.attrs[date_collected]datetime.now().isoformat()sensor_dset.attrs[location]Lab Building A, Room 101sensor_dset.attrs[temperature]23.5sensor_dset.attrs[humidity]45.2# # 9. 維度標(biāo)簽# dims_groupf.create_group(with_dimensions)# 3D數(shù)據(jù)with維度標(biāo)簽volume_datanp.random.randn(50,100,100)volumedims_group.create_dataset(volume,datavolume_data)# 創(chuàng)建維度標(biāo)度z_coordsdims_group.create_dataset(z,datanp.arange(50))y_coordsdims_group.create_dataset(y,datanp.arange(100))x_coordsdims_group.create_dataset(x,datanp.arange(100))# 附加維度volume.dims[0].labelzvolume.dims[1].labelyvolume.dims[2].labelxvolume.dims[0].attach_scale(z_coords)volume.dims[1].attach_scale(y_coords)volume.dims[2].attach_scale(x_coords)# # 10. 實(shí)際應(yīng)用示例神經(jīng)網(wǎng)絡(luò)權(quán)重# nn_groupf.create_group(neural_network)# 模擬神經(jīng)網(wǎng)絡(luò)層layers{layer1:{weights:np.random.randn(784,128),biases:np.zeros(128)},layer2:{weights:np.random.randn(128,64),biases:np.zeros(64)},layer3:{weights:np.random.randn(64,10),biases:np.zeros(10)}}forlayer_name,paramsinlayers.items():layer_groupnn_group.create_group(layer_name)forparam_name,param_valueinparams.items():dsetlayer_group.create_dataset(param_name,dataparam_value)dset.attrs[trainable]Truedset.attrs[dtype]str(param_value.dtype)nn_group.attrs[architecture]feedforwardnn_group.attrs[input_size]784nn_group.attrs[output_size]10# # 11. 時(shí)間序列數(shù)據(jù)# ts_groupf.create_group(time_series)# 生成時(shí)間戳num_samples1000timestamps[]start_timedatetime(2024,1,1,0,0,0)foriinrange(num_samples):tsstart_timetimedelta(secondsi)timestamps.append(ts.isoformat())# 存儲(chǔ)時(shí)間戳dt_typeh5py.string_dtype(encodingutf-8)ts_group.create_dataset(timestamps,datatimestamps,dtypedt_type)# 存儲(chǔ)對(duì)應(yīng)的數(shù)值ts_group.create_dataset(values,datanp.random.randn(num_samples,5))# # 12. 大數(shù)據(jù)集with優(yōu)化# large_groupf.create_group(large_data)large_datanp.random.randn(10000,1000).astype(float32)large_dsetlarge_group.create_dataset(optimized,datalarge_data,chunks(1000,100),compressiongzip,compression_opts4,shuffleTrue)large_dset.attrs[chunk_strategy]optimized for row accesslarge_dset.attrs[compression_ratio]f{large_data.nbytes/large_dset.id.get_storage_size():.2f}xprint(f文件 {filename} 創(chuàng)建完成)# 創(chuàng)建文件create_comprehensive_h5()10.2 讀取和分析綜合示例文件 讀取并分析綜合示例文件 defanalyze_h5_file(filenamecomprehensive.h5):print(f分析文件:{filename})print(*80)withh5py.File(filename,r)asf:# 顯示文件屬性print(
【文件屬性】)forkey,valueinf.attrs.items():print(f{key}:{value})# 遞歸顯示結(jié)構(gòu)print(
【文件結(jié)構(gòu)】)defprint_tree(name,obj,level0):indent *levelifisinstance(obj,h5py.Group):print(f{indent}{name}/)# 顯示組屬性iflen(obj.attrs)0:forkeyinobj.attrs.keys():print(f{indent}{key}:{obj.attrs[key]})elifisinstance(obj,h5py.Dataset):size_mbobj.nbytes/(1024*1024)storage_mbobj.id.get_storage_size()/(1024*1024)compressionobj.compressionornoneprint(f{indent}{name})print(f{indent}形狀:{obj.shape}, 類型:{obj.dtype})print(f{indent}大小:{size_mb:.2f}MB, 存儲(chǔ):{storage_mb:.2f}MB)print(f{indent}壓縮:{compression})ifobj.chunks:print(f{indent}分塊:{obj.chunks})# 顯示數(shù)據(jù)集屬性iflen(obj.attrs)0:print(f{indent}屬性:)forkey,valueinobj.attrs.items():print(f{indent}{key}:{value})f.visititems(lambdan,o:print_tree(n,o,n.count(/)))# 統(tǒng)計(jì)信息print(
【統(tǒng)計(jì)信息】)defcount_items(group):groups0datasets0total_size0defcount(name,obj):nonlocalgroups,datasets,total_sizeifisinstance(obj,h5py.Group):groups1elifisinstance(obj,h5py.Dataset):datasets1total_sizeobj.nbytes group.visititems(count)returngroups,datasets,total_size num_groups,num_datasets,total_sizecount_items(f)file_sizeos.path.getsize(filename)print(f 組數(shù)量:{num_groups})print(f 數(shù)據(jù)集數(shù)量:{num_datasets})print(f 原始數(shù)據(jù)大小:{total_size/(1024*1024):.2f}MB)print(f 文件大小:{file_size/(1024*1024):.2f}MB)print(f 總壓縮率:{total_size/file_size:.2f}x)# 示例讀取特定數(shù)據(jù)print(
【示例數(shù)據(jù)讀取】)# 讀取基礎(chǔ)類型ifbasic_types/float64inf:dataf[basic_types/float64][:10]print(f float64前10個(gè)值:{data})# 讀取結(jié)構(gòu)體ifcompound_types/peopleinf:peoplef[compound_types/people][:]print(f 人員記錄:)forpersoninpeople:print(f{person[name].decode()}: 年齡{person[age]}, 工資${person[salary]})# 讀取神經(jīng)網(wǎng)絡(luò)權(quán)重ifneural_networkinf:print(f 神經(jīng)網(wǎng)絡(luò)架構(gòu):{f[neural_network].attrs[architecture]})print(f 層:)forlayer_nameinf[neural_network].keys():layerf[neural_network][layer_name]weights_shapelayer[weights].shapeprint(f{layer_name}:{weights_shape})# 運(yùn)行分析if__name____main__:create_comprehensive_h5()analyze_h5_file()10.3 實(shí)用工具函數(shù)集 HDF5實(shí)用工具函數(shù)集 classHDF5Utils:HDF5工具類staticmethoddefget_file_info(filename):獲取文件基本信息withh5py.File(filename,r)asf:info{filename:filename,file_size_mb:os.path.getsize(filename)/(1024*1024),num_groups:0,num_datasets:0,total_data_size_mb:0}defcount(name,obj):ifisinstance(obj,h5py.Group):info[num_groups]1elifisinstance(obj,h5py.Dataset):info[num_datasets]1info[total_data_size_mb]obj.nbytes/(1024*1024)f.visititems(count)# 文件屬性info[attributes]dict(f.attrs)returninfostaticmethoddeffind_large_datasets(filename,threshold_mb10):查找大于閾值的數(shù)據(jù)集large_datasets[]withh5py.File(filename,r)asf:defcheck_size(name,obj):ifisinstance(obj,h5py.Dataset):size_mbobj.nbytes/(1024*1024)ifsize_mbthreshold_mb:large_datasets.append({name:name,size_mb:size_mb,shape:obj.shape,dtype:str(obj.dtype)})f.visititems(check_size)returnsorted(large_datasets,keylambdax:x[size_mb],reverseTrue)staticmethoddefcopy_dataset(src_file,src_path,dst_file,dst_pathNone):復(fù)制數(shù)據(jù)集到另一個(gè)文件ifdst_pathisNone:dst_pathsrc_pathwithh5py.File(src_file,r)asf_src:withh5py.File(dst_file,a)asf_dst:f_src.copy(src_path,f_dst,namedst_path)staticmethoddefexport_to_dict(filename,path/):將HDF5導(dǎo)出為嵌套字典result{}withh5py.File(filename,r)asf:defbuild_dict(name,obj):partsname.split(/)currentresultforpartinparts[:-1]:ifpartnotincurrent:current[part]{}currentcurrent[part]ifisinstance(obj,h5py.Dataset):current[parts[-1]]obj[:]elifisinstance(obj,h5py.Group):ifparts[-1]notincurrent:current[parts[-1]]{}f.visititems(build_dict)returnresultstaticmethoddefget_compression_stats(filename):獲取壓縮統(tǒng)計(jì)信息stats[]withh5py.File(filename,r)asf:defanalyze_compression(name,obj):ifisinstance(obj,h5py.Dataset):original_sizeobj.nbytes storage_sizeobj.id.get_storage_size()stats.append({name:name,compression:obj.compressionornone,original_mb:original_size/(1024*1024),storage_mb:storage_size/(1024*1024),ratio:original_size/storage_sizeifstorage_size0else1.0})f.visititems(analyze_compression)returnstatsstaticmethoddefvalidate_file(filename):驗(yàn)證HDF5文件完整性try:withh5py.File(filename,r)asf:# 嘗試訪問(wèn)所有數(shù)據(jù)集errors[]defvalidate_dataset(name,obj):ifisinstance(obj,h5py.Dataset):try:# 嘗試讀取第一個(gè)元素ifobj.size0:_obj.flat[0]exceptExceptionase:errors.append(f{name}:{str(e)})f.visititems(validate_dataset)iferrors:returnFalse,errorselse:returnTrue,[文件驗(yàn)證通過(guò)]exceptExceptionase:returnFalse,[f無(wú)法打開(kāi)文件:{str(e)}]# 使用示例if__name____main__:utilsHDF5Utils()# 獲取文件信息infoutils.get_file_info(comprehensive.h5)print(文件信息:,info)# 查找大數(shù)據(jù)集largeutils.find_large_datasets(comprehensive.h5,threshold_mb1)print(
大數(shù)據(jù)集:,large)# 獲取壓縮統(tǒng)計(jì)comp_statsutils.get_compression_stats(comprehensive.h5)print(
壓縮統(tǒng)計(jì):)forstatincomp_stats[:5]:# 只顯示前5個(gè)print(f{stat[name]}:{stat[ratio]:.2f}x ({stat[compression]}))# 驗(yàn)證文件valid,messagesutils.validate_file(comprehensive.h5)print(f
文件驗(yàn)證:{通過(guò)ifvalidelse失敗})formsginmessages:print(f{msg})總結(jié)這份指南涵蓋了HDF5的所有主要特性基礎(chǔ)數(shù)據(jù)類型- 數(shù)值、字符串、布爾等組結(jié)構(gòu)- 層次化組織數(shù)據(jù)屬性系統(tǒng)- 元數(shù)據(jù)管理高級(jí)數(shù)據(jù)類型- 復(fù)合類型、枚舉、變長(zhǎng)數(shù)據(jù)引用和鏈接- 軟鏈接、硬鏈接、對(duì)象引用壓縮和分塊- 優(yōu)化存儲(chǔ)和訪問(wèn)可擴(kuò)展數(shù)據(jù)集- 動(dòng)態(tài)增長(zhǎng)的數(shù)據(jù)維度標(biāo)簽- 為數(shù)據(jù)添加物理意義完整示例- 實(shí)際應(yīng)用代碼