Data Analyzer

Analyze and summarize data from Excel, CSV, Word, and PDF files.

Features

📊 Multi-Format Input: Excel, CSV, Word, PDF, TXT, Markdown
📁 Folder Scan: Analyze entire folders
📈 Statistics: Sum, average, trends
🔄 Data Merge: Combine multiple files
📉 Visualization: Generate charts
📋 Multi-Format Output: Markdown, Excel, Word, PDF
🌍 Multi-Language: English, Chinese output

Trigger Conditions

"Analyze this folder" / "分析这个文件夹"
"Compare these Excel files" / "对比这些Excel"
"Summarize the data" / "汇总数据"
"data-analyzer"

Python Code

import os
import pandas as pd
from pathlib import Path
from docx import Document
import fitz  # PyMuPDF

class DataAnalyzer:
    def __init__(self, folder_path):
        self.folder = Path(folder_path)
        self.files = self._scan_files()
    
    def _scan_files(self):
        """Scan folder for supported files"""
        files = {
            'excel': [], 'csv': [], 'word': [],
            'pdf': [], 'txt': [], 'markdown': []
        }
        
        for f in self.folder.rglob('*'):
            ext = f.suffix.lower()
            if ext in ['.xlsx', '.xls']:
                files['excel'].append(str(f))
            elif ext == '.csv':
                files['csv'].append(str(f))
            elif ext == '.docx':
                files['word'].append(str(f))
            elif ext == '.pdf':
                files['pdf'].append(str(f))
            elif ext == '.txt':
                files['txt'].append(str(f))
            elif ext in ['.md', '.markdown']:
                files['markdown'].append(str(f))
        
        return files
    
    def analyze_excel(self, file_path):
        """Analyze Excel file"""
        df = pd.read_excel(file_path)
        return {'rows': len(df), 'columns': len(df.columns), 'data': df}
    
    def analyze_csv(self, file_path):
        """Analyze CSV file"""
        df = pd.read_csv(file_path)
        return {'rows': len(df), 'columns': len(df.columns), 'data': df}
    
    def analyze_word(self, file_path):
        """Analyze Word file"""
        doc = Document(file_path)
        text = '\n'.join([p.text for p in doc.paragraphs if p.text.strip()])
        return {'paragraphs': len(doc.paragraphs), 'text': text}
    
    def analyze_pdf(self, file_path):
        """Analyze PDF file"""
        doc = fitz.open(file_path)
        text = ''
        for page in doc:
            text += page.get_text()
        result = {'pages': len(doc), 'text': text}
        doc.close()
        return result
    
    def analyze_txt(self, file_path):
        """Analyze TXT file"""
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
        return {'lines': len(text.split('\n')), 'text': text}
    
    def analyze_markdown(self, file_path):
        """Analyze Markdown file"""
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
        return {'lines': len(text.split('\n')), 'text': text}
    
    def analyze_file(self, file_path):
        """Auto-detect and analyze any supported file"""
        ext = Path(file_path).suffix.lower()
        
        if ext in ['.xlsx', '.xls']:
            return self.analyze_excel(file_path)
        elif ext == '.csv':
            return self.analyze_csv(file_path)
        elif ext == '.docx':
            return self.analyze_word(file_path)
        elif ext == '.pdf':
            return self.analyze_pdf(file_path)
        elif ext == '.txt':
            return self.analyze_txt(file_path)
        elif ext in ['.md', '.markdown']:
            return self.analyze_markdown(file_path)
        else:
            return {'error': f'Unsupported format: {ext}'}
    
    def generate_summary(self):
        """Generate analysis summary"""
        summary = {'total_files': 0, 'file_details': []}
        
        for ftype, flist in self.files.items():
            for fpath in flist:
                try:
                    analysis = self.analyze_file(fpath)
                    summary['file_details'].append({
                        'name': os.path.basename(fpath),
                        'type': ftype,
                        'analysis': analysis
                    })
                    summary['total_files'] += 1
                except Exception as e:
                    summary['file_details'].append({
                        'name': os.path.basename(fpath),
                        'type': ftype,
                        'error': str(e)
                    })
        
        return summary

Usage

User: "Analyze all Excel files in this folder"
Agent: 使用 DataAnalyzer 扫描并分析

User: "Compare these CSV files"
Agent: 读取并对比数据

User: "Generate a data report"
Agent: 生成分析报告

Notes

Supports .xlsx, .csv, .docx, .pdf
Local processing, no data uploaded
Cross-platform compatible