admin 管理员组

文章数量: 1184232

012-XMP数据处理

学习目标

通过本章学习,你将掌握:

  1. XMP标准理解

    • XMP元数据架构和结构
    • XMP命名空间和属性定义
    • XMP与其他元数据标准的关系
  2. XMP数据操作

    • 读取和解析XMP数据
    • 写入和修改XMP属性
    • 处理复杂的XMP结构
  3. 高级XMP处理

    • 自定义XMP命名空间
    • XMP数据验证和清理
    • 跨应用程序XMP兼容性
  4. 实际应用场景

    • 数字资产管理系统集成
    • 创意工作流程优化
    • 元数据标准化处理

XMP数据概述

XMP简介

XMP(Extensible Metadata Platform)是Adobe开发的可扩展元数据平台,基于XML和RDF标准,提供了一种标准化的方式来创建、处理和交换数字文档和数据集的元数据。

XMP数据结构

核心命名空间定义

# 文件路径: xmp_namespaces.pyfrom typing import Dict, List
classXMPNamespaces:"""XMP命名空间定义"""# 核心命名空间
    NAMESPACES ={
   
   # Dublin Core'dc':',# XMP Basic'xmp':',# XMP Rights'xmpRights':',# XMP Media Management'xmpMM':',# XMP Basic Job Ticket'xmpBJ':',# XMP Paged-Text'xmpTPg':',# XMP Dynamic Media'xmpDM':',# Photoshop'photoshop':',# Camera Raw'crs':',# EXIF'exif':',# TIFF'tiff':',# IPTC Core'Iptc4xmpCore':',# IPTC Extension'Iptc4xmpExt':',# PLUS'plus':',# Creative Commons'cc':',# DICOM'DICOM':',# PDF'pdf':',# Illustrator'illustrator':',# InDesign'xmpidq':'}# 常用属性定义
    COMMON_PROPERTIES ={
   
   # Dublin Core'dc:title':{
   
   'type':'Lang Alt','description':'标题'},'dc:creator':{
   
   'type':'Seq','description':'创建者'},'dc:description':{
   
   'type':'Lang Alt','description':'描述'},'dc:subject':{
   
   'type':'Bag','description':'主题/关键词'},'dc:rights':{
   
   'type':'Lang Alt','description':'版权信息'},'dc:publisher':{
   
   'type':'Bag','description':'发布者'},'dc:contributor':{
   
   'type':'Seq','description':'贡献者'},'dc:date':{
   
   'type':'Seq','description':'日期'},'dc:type':{
   
   'type':'Bag','description':'资源类型'},'dc:format':{
   
   'type':'Text','description':'文件格式'},'dc:identifier':{
   
   'type':'Text','description':'唯一标识符'},'dc:source':{
   
   'type':'Text','description':'来源'},'dc:language':{
   
   'type':'Bag','description':'语言'},'dc:relation':{
   
   'type':'Bag','description':'相关资源'},'dc:coverage':{
   
   'type':'Text','description':'覆盖范围'},# XMP Basic'xmp:CreateDate':{
   
   'type':'Date','description':'创建日期'},'xmp:ModifyDate':{
   
   'type':'Date','description':'修改日期'},'xmp:MetadataDate':{
   
   'type':'Date','description':'元数据日期'},'xmp:CreatorTool':{
   
   'type':'Text','description':'创建工具'},'xmp:Rating':{
   
   'type':'Integer','description':'评级'},'xmp:Label':{
   
   'type':'Text','description':'标签'},'xmp:Nickname':{
   
   'type':'Text','description':'昵称'},'xmp:Identifier':{
   
   'type':'Bag','description':'标识符'},'xmp:Advisory':{
   
   'type':'Bag','description':'建议'},'xmp:BaseURL':{
   
   'type':'URL','description':'基础URL'},# XMP Rights'xmpRights:Marked':{
   
   'type':'Boolean','description':'版权标记'},'xmpRights:WebStatement':{
   
   'type':'URL','description':'版权声明URL'},'xmpRights:Certificate':{
   
   'type':'URL','description':'版权证书URL'},'xmpRights:Owner':{
   
   'type':'Bag','description':'版权所有者'},'xmpRights:UsageTerms':{
   
   'type':'Lang Alt','description':'使用条款'},# Photoshop'photoshop:AuthorsPosition':{
   
   'type':'Text','description':'作者职位'},'photoshop:CaptionWriter':{
   
   'type':'Text','description':'说明撰写者'},'photoshop:Category':{
   
   'type':'Text','description':'类别'},'photoshop:City':{
   
   'type':'Text','description':'城市'},'photoshop:Country':{
   
   'type':'Text','description':'国家'},'photoshop:Credit':{
   
   'type':'Text','description':'信用'},'photoshop:DateCreated':{
   
   'type':'Date','description':'创建日期'},'photoshop:Headline':{
   
   'type':'Text','description':'标题'},'photoshop:Instructions':{
   
   'type':'Text','description':'说明'},'photoshop:Source':{
   
   'type':'Text','description':'来源'},'photoshop:State':{
   
   'type':'Text','description':'州/省'},'photoshop:SupplementalCategories':{
   
   'type':'Bag','description':'补充类别'},'photoshop:TransmissionReference':{
   
   'type':'Text','description':'传输参考'},'photoshop:Urgency':{
   
   'type':'Integer','description':'紧急程度'}}@classmethoddefget_namespace_uri(cls, prefix:str)->str:"""获取命名空间URI"""return cls.NAMESPACES.get(prefix,'')@classmethoddefget_property_info(cls, property_name:str)-> Dict:"""获取属性信息"""return cls.COMMON_PROPERTIES.get(property_name,{
   
   })@classmethoddefis_array_property(cls, property_name:str)->bool:"""检查是否为数组属性"""
        prop_info = cls.get_property_info(property_name)return prop_info.get('type','')in['Seq','Bag','Alt','Lang Alt']@classmethoddefget_array_type(cls, property_name:str)->str:"""获取数组类型"""
        prop_info = cls.get_property_info(property_name)
        prop_type = prop_info.get('type','')if prop_type in['Seq','Bag','Alt','Lang Alt']:return prop_type
        return''

XMP数据读取

XMP读取器实现

# 文件路径: xmp_reader.pyimport json
import subprocess
import xml.etree.ElementTree as ET
from typing import Dict, List, Any, Optional
from pathlib import Path
import re
from datetime import datetime
classXMPReader:"""XMP数据读取器"""def__init__(self, exiftool_path='exiftool'):
        self.exiftool_path = exiftool_path
        self.namespaces = XMPNamespaces()defread_xmp_data(self, file_path:str)-> Dict:"""读取文件的XMP数据"""try:# 使用ExifTool提取XMP数据
            result = subprocess.run([
                self.exiftool_path,'-XMP:all','-j',# JSON输出'-struct',# 保持结构化数据
                file_path
            ], capture_output=True, text=True, encoding='utf-8')if result.returncode !=0:return{
   
   'success':False,'error':f'ExifTool执行失败: {
     
     result.stderr}'}# 解析JSON输出
            data = json.loads(result.stdout)ifnot data:return{
   
   'success':True,'xmp_data':{
   
   },'message':'文件不包含XMP数据'}
            
            file_data = data[0]# 提取XMP相关字段
            xmp_data ={
   
   }for key, value in file_data.items():if key.startswith(('XMP:','XMP-')):# 移除XMP前缀
                    clean_key = key.replace('XMP:','').replace('XMP-','')
                    xmp_data[clean_key]= value
            
            return{
   
   'success':True,'file_path': file_path,'xmp_data': xmp_data,'raw_data': file_data
            }except json.JSONDecodeError as e:return{
   
   'success':False,'error':f'JSON解析失败: {
     
     e}'}except Exception as e:return{
   
   'success':False,'error':f'读取XMP数据失败: {
     
     e}'}defextract_xmp_packet(self, file_path:str)-> Dict:"""提取原始XMP数据包"""try:# 提取原始XMP数据包
            result = subprocess.run([
                self.exiftool_path,'-XMP','-b',# 二进制输出
                file_path
            ], capture_output=True, text=True, encoding='utf-8')if result.returncode !=0:return{
   
   'success':False,'error':f'提取XMP数据包失败: {
     
     result.stderr}'}
            
            xmp_packet = result.stdout
            
            ifnot xmp_packet.strip():return{
   
   'success':True,'xmp_packet':'','message':'文件不包含XMP数据包'}# 解析XMP数据包
            parsed_data = self._parse_xmp_packet(xmp_packet)return{
   
   'success':True,'file_path': file_path,'xmp_packet': xmp_packet,'parsed_data': parsed_data
            }except Exception as e:return{
   
   'success':False,'error':f'提取XMP数据包失败: {
     
     e}'}def_parse_xmp_packet(self, xmp_packet:str)-> Dict:"""解析XMP数据包"""try:# 清理XMP数据包
            cleaned_packet = self._clean_xmp_packet(xmp_packet)# 解析XML
            root = ET.fromstring(cleaned_packet)# 提取命名空间
            namespaces = self._extract_namespaces(root)# 解析RDF数据
            rdf_data = self._parse_rdf_data(root, namespaces)return{
   
   'namespaces': namespaces,'rdf_data': rdf_data,'properties': self._flatten_properties(rdf_data)}except ET.ParseError as e:return{
   
   'error':f'XML解析失败: {
     
     e}','raw_packet': xmp_packet
            }except Exception as e:return{
   
   'error':f'解析失败: {
     
     e}','raw_packet': xmp_packet
            }def_clean_xmp_packet(self, xmp_packet:str)->str:"""清理XMP数据包"""# 移除XMP包装器
        packet = xmp_packet
        
        # 查找XML开始标记
        xml_start = packet.find('<?xml')if xml_start !=-1:
            packet = packet[xml_start:]# 查找XMP结束标记
        xmp_end = packet.find('<?xpacket end=')if xmp_end !=-1:# 找到RDF结束标记
            rdf_end = packet.rfind('</rdf:RDF>',0, xmp_end)if rdf_end !=-1:
                packet = packet[:rdf_end +10]# 包含</rdf:RDF>return packet
    
    def_extract_namespaces(self, root: ET.Element)-> Dict[str,str]:"""提取命名空间"""
        namespaces ={
   
   }# 从根元素提取命名空间for key, value in root.attrib.items():if key.startswith('xmlns:'):
                prefix = key[6:]# 移除'xmlns:'
                namespaces[prefix]= value
            elif key =='xmlns':
                namespaces['']= value
        
        # 递归提取子元素的命名空间for elem in root.iter():for key, value in elem.attrib.items():if key.startswith('xmlns:'):
                    prefix = key[6:]if prefix notin namespaces:
                        namespaces[prefix]= value
        
        return namespaces
    
    def_parse_rdf_data(self, root: ET.Element, namespaces: Dict[str,str])-> Dict:"""解析RDF数据"""
        rdf_data ={
   
   }# 查找RDF:Description元素for desc in root.iter():if desc.tag.endswith('}Description')or desc.tag =='rdf:Description':# 解析属性for key, value in desc.attrib.items():ifnot key.startswith('xmlns')andnot key.startswith('rdf:'):
                        rdf_data[key]= value
                
                # 解析子元素for child in desc:
                    child_data = self._parse_element(child, namespaces)if child_data:
                        rdf_data.update(child_data)return rdf_data
    
    def_parse_element(self

本文标签: 系统 编程 数据