Back to skills
extension
Category: OtherNo API key required

Data Analyzer

Data analysis tool for Excel, CSV, Word, PDF, TXT, Markdown files. Use when user needs to analyze, summarize, or compare data from multiple files. Supports f...

personAuthor: tobewinhubclawhub

Data Analyzer

Analyze and summarize data from Excel, CSV, Word, and PDF files.

Features

  • 📊 Multi-Format Input: Excel, CSV, Word, PDF, TXT, Markdown
  • 📁 Folder Scan: Analyze entire folders
  • 📈 Statistics: Sum, average, trends
  • 🔄 Data Merge: Combine multiple files
  • 📉 Visualization: Generate charts
  • 📋 Multi-Format Output: Markdown, Excel, Word, PDF
  • 🌍 Multi-Language: English, Chinese output

Trigger Conditions

  • "Analyze this folder" / "分析这个文件夹"
  • "Compare these Excel files" / "对比这些Excel"
  • "Summarize the data" / "汇总数据"
  • "data-analyzer"

Python Code

import os
import pandas as pd
from pathlib import Path
from docx import Document
import fitz  # PyMuPDF

class DataAnalyzer:
    def __init__(self, folder_path):
        self.folder = Path(folder_path)
        self.files = self._scan_files()
    
    def _scan_files(self):
        """Scan folder for supported files"""
        files = {
            'excel': [], 'csv': [], 'word': [],
            'pdf': [], 'txt': [], 'markdown': []
        }
        
        for f in self.folder.rglob('*'):
            ext = f.suffix.lower()
            if ext in ['.xlsx', '.xls']:
                files['excel'].append(str(f))
            elif ext == '.csv':
                files['csv'].append(str(f))
            elif ext == '.docx':
                files['word'].append(str(f))
            elif ext == '.pdf':
                files['pdf'].append(str(f))
            elif ext == '.txt':
                files['txt'].append(str(f))
            elif ext in ['.md', '.markdown']:
                files['markdown'].append(str(f))
        
        return files
    
    def analyze_excel(self, file_path):
        """Analyze Excel file"""
        df = pd.read_excel(file_path)
        return {'rows': len(df), 'columns': len(df.columns), 'data': df}
    
    def analyze_csv(self, file_path):
        """Analyze CSV file"""
        df = pd.read_csv(file_path)
        return {'rows': len(df), 'columns': len(df.columns), 'data': df}
    
    def analyze_word(self, file_path):
        """Analyze Word file"""
        doc = Document(file_path)
        text = '\n'.join([p.text for p in doc.paragraphs if p.text.strip()])
        return {'paragraphs': len(doc.paragraphs), 'text': text}
    
    def analyze_pdf(self, file_path):
        """Analyze PDF file"""
        doc = fitz.open(file_path)
        text = ''
        for page in doc:
            text += page.get_text()
        result = {'pages': len(doc), 'text': text}
        doc.close()
        return result
    
    def analyze_txt(self, file_path):
        """Analyze TXT file"""
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
        return {'lines': len(text.split('\n')), 'text': text}
    
    def analyze_markdown(self, file_path):
        """Analyze Markdown file"""
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
        return {'lines': len(text.split('\n')), 'text': text}
    
    def analyze_file(self, file_path):
        """Auto-detect and analyze any supported file"""
        ext = Path(file_path).suffix.lower()
        
        if ext in ['.xlsx', '.xls']:
            return self.analyze_excel(file_path)
        elif ext == '.csv':
            return self.analyze_csv(file_path)
        elif ext == '.docx':
            return self.analyze_word(file_path)
        elif ext == '.pdf':
            return self.analyze_pdf(file_path)
        elif ext == '.txt':
            return self.analyze_txt(file_path)
        elif ext in ['.md', '.markdown']:
            return self.analyze_markdown(file_path)
        else:
            return {'error': f'Unsupported format: {ext}'}
    
    def generate_summary(self):
        """Generate analysis summary"""
        summary = {'total_files': 0, 'file_details': []}
        
        for ftype, flist in self.files.items():
            for fpath in flist:
                try:
                    analysis = self.analyze_file(fpath)
                    summary['file_details'].append({
                        'name': os.path.basename(fpath),
                        'type': ftype,
                        'analysis': analysis
                    })
                    summary['total_files'] += 1
                except Exception as e:
                    summary['file_details'].append({
                        'name': os.path.basename(fpath),
                        'type': ftype,
                        'error': str(e)
                    })
        
        return summary

Usage

User: "Analyze all Excel files in this folder"
Agent: 使用 DataAnalyzer 扫描并分析

User: "Compare these CSV files"
Agent: 读取并对比数据

User: "Generate a data report"
Agent: 生成分析报告

Notes

  • Supports .xlsx, .csv, .docx, .pdf
  • Local processing, no data uploaded
  • Cross-platform compatible