This commit is contained in:
Josh at WLTechBlog
2025-12-16 12:26:36 -07:00
parent 051b912122
commit 34a512e278
9 changed files with 2450 additions and 0 deletions

View File

@@ -2240,6 +2240,63 @@ func (d *Daemon) handleCommand(w http.ResponseWriter, r *http.Request) {
response = Response{Success: true, Data: result}
}
case "extract-divi-structure":
tabID := cmd.Params["tab"]
timeoutStr := cmd.Params["timeout"]
// Parse timeout (default to 30 seconds)
timeout := 30
if timeoutStr != "" {
if t, err := strconv.Atoi(timeoutStr); err == nil {
timeout = t
}
}
result, err := d.extractDiviStructure(tabID, timeout)
if err != nil {
response = Response{Success: false, Error: err.Error()}
} else {
response = Response{Success: true, Data: result}
}
case "extract-divi-images":
tabID := cmd.Params["tab"]
timeoutStr := cmd.Params["timeout"]
// Parse timeout (default to 30 seconds)
timeout := 30
if timeoutStr != "" {
if t, err := strconv.Atoi(timeoutStr); err == nil {
timeout = t
}
}
result, err := d.extractDiviImages(tabID, timeout)
if err != nil {
response = Response{Success: false, Error: err.Error()}
} else {
response = Response{Success: true, Data: result}
}
case "extract-divi-content":
tabID := cmd.Params["tab"]
timeoutStr := cmd.Params["timeout"]
// Parse timeout (default to 30 seconds)
timeout := 30
if timeoutStr != "" {
if t, err := strconv.Atoi(timeoutStr); err == nil {
timeout = t
}
}
result, err := d.extractDiviContent(tabID, timeout)
if err != nil {
response = Response{Success: false, Error: err.Error()}
} else {
response = Response{Success: true, Data: result}
}
default:
d.debugLog("Unknown action: %s", cmd.Action)
response = Response{Success: false, Error: "Unknown action"}
@@ -12819,3 +12876,524 @@ func (d *Daemon) getFormAccessibilityAudit(tabID, formSelector string, timeout i
d.debugLog("Successfully generated form accessibility audit for tab: %s (found %d forms)", tabID, summary.FormsFound)
return &summary, nil
}
// DiviStructure represents the extracted Divi page structure
type DiviStructure struct {
URL string `json:"url"`
Sections []DiviSection `json:"sections"`
Metadata struct {
ExtractionDate string `json:"extraction_date"`
Accuracy string `json:"accuracy"`
Limitations string `json:"limitations"`
} `json:"metadata"`
}
// DiviSection represents a Divi section
type DiviSection struct {
Type string `json:"type"` // regular, specialty, fullwidth
HasParallax bool `json:"has_parallax"`
BackgroundColor string `json:"background_color"`
BackgroundImage string `json:"background_image"`
BackgroundStyle string `json:"background_style"`
Rows []DiviRow `json:"rows"`
CSSClasses []string `json:"css_classes"`
}
// DiviRow represents a Divi row
type DiviRow struct {
ColumnStructure string `json:"column_structure"` // e.g., "1_2,1_2" for two half columns
Columns []DiviColumn `json:"columns"`
CSSClasses []string `json:"css_classes"`
}
// DiviColumn represents a Divi column
type DiviColumn struct {
Type string `json:"type"` // e.g., "1_2", "1_3", "4_4"
Modules []DiviModule `json:"modules"`
CSSClasses []string `json:"css_classes"`
}
// DiviModule represents a Divi module
type DiviModule struct {
Type string `json:"type"` // text, image, button, blurb, etc.
Content string `json:"content"`
Attributes map[string]string `json:"attributes"`
CSSClasses []string `json:"css_classes"`
}
// DiviImage represents an extracted image
type DiviImage struct {
URL string `json:"url"`
Alt string `json:"alt"`
Title string `json:"title"`
Width int `json:"width"`
Height int `json:"height"`
Context string `json:"context"` // e.g., "section 0, row 0, column 0, module 2"
IsBackground bool `json:"is_background"`
}
// DiviContent represents extracted content
type DiviContent struct {
URL string `json:"url"`
Modules []DiviModule `json:"modules"`
Images []DiviImage `json:"images"`
Metadata struct {
ExtractionDate string `json:"extraction_date"`
TotalModules int `json:"total_modules"`
TotalImages int `json:"total_images"`
} `json:"metadata"`
}
// extractDiviStructure extracts Divi page structure from rendered HTML
func (d *Daemon) extractDiviStructure(tabID string, timeout int) (*DiviStructure, error) {
d.debugLog("Extracting Divi structure from tab: %s", tabID)
page, err := d.getTab(tabID)
if err != nil {
return nil, err
}
// JavaScript to extract Divi structure
jsCode := `
(function() {
const result = {
url: window.location.href,
sections: [],
metadata: {
extraction_date: new Date().toISOString(),
accuracy: "60-70% (approximation from CSS classes)",
limitations: "Cannot access original Divi shortcode/JSON or builder settings"
}
};
// Find all Divi sections
const sections = document.querySelectorAll('.et_pb_section, .et_section_specialty, .et_pb_fullwidth_section');
sections.forEach((section, sectionIndex) => {
const sectionData = {
type: 'regular',
has_parallax: false,
background_color: '',
background_image: '',
background_style: '',
rows: [],
css_classes: Array.from(section.classList)
};
// Determine section type from CSS classes
if (section.classList.contains('et_section_specialty')) {
sectionData.type = 'specialty';
} else if (section.classList.contains('et_pb_fullwidth_section')) {
sectionData.type = 'fullwidth';
}
// Check for parallax
if (section.classList.contains('et_pb_section_parallax')) {
sectionData.has_parallax = true;
}
// Get computed styles
const styles = window.getComputedStyle(section);
sectionData.background_color = styles.backgroundColor;
sectionData.background_image = styles.backgroundImage;
// Extract background style
if (styles.backgroundImage && styles.backgroundImage !== 'none') {
if (styles.backgroundImage.includes('gradient')) {
sectionData.background_style = 'gradient';
} else {
sectionData.background_style = 'image';
}
}
// Find all rows in this section
const rows = section.querySelectorAll('.et_pb_row, .et_pb_row_inner');
rows.forEach((row, rowIndex) => {
const rowData = {
column_structure: '',
columns: [],
css_classes: Array.from(row.classList)
};
// Find all columns in this row
const columns = row.querySelectorAll('.et_pb_column');
const columnTypes = [];
columns.forEach((column, columnIndex) => {
const columnData = {
type: '',
modules: [],
css_classes: Array.from(column.classList)
};
// Determine column type from CSS classes
const columnClass = Array.from(column.classList).find(cls => cls.startsWith('et_pb_column_'));
if (columnClass) {
columnData.type = columnClass.replace('et_pb_column_', '');
columnTypes.push(columnData.type);
}
// Find all modules in this column
const modules = column.querySelectorAll('[class*="et_pb_module"]');
modules.forEach((module, moduleIndex) => {
const moduleData = {
type: 'unknown',
content: '',
attributes: {},
css_classes: Array.from(module.classList)
};
// Determine module type from CSS classes
if (module.classList.contains('et_pb_text')) {
moduleData.type = 'text';
moduleData.content = module.innerHTML;
} else if (module.classList.contains('et_pb_image')) {
moduleData.type = 'image';
const img = module.querySelector('img');
if (img) {
moduleData.attributes.src = img.src;
moduleData.attributes.alt = img.alt || '';
}
} else if (module.classList.contains('et_pb_button')) {
moduleData.type = 'button';
const btn = module.querySelector('a');
if (btn) {
moduleData.content = btn.textContent;
moduleData.attributes.href = btn.href;
}
} else if (module.classList.contains('et_pb_blurb')) {
moduleData.type = 'blurb';
moduleData.content = module.innerHTML;
} else if (module.classList.contains('et_pb_cta')) {
moduleData.type = 'cta';
moduleData.content = module.innerHTML;
} else if (module.classList.contains('et_pb_slider')) {
moduleData.type = 'slider';
} else if (module.classList.contains('et_pb_gallery')) {
moduleData.type = 'gallery';
} else if (module.classList.contains('et_pb_video')) {
moduleData.type = 'video';
}
columnData.modules.push(moduleData);
});
rowData.columns.push(columnData);
});
// Build column structure string
rowData.column_structure = columnTypes.join(',');
sectionData.rows.push(rowData);
});
result.sections.push(sectionData);
});
return result;
})();
`
// Execute JavaScript with timeout
var resultData interface{}
if timeout > 0 {
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
done := make(chan struct {
data interface{}
err error
}, 1)
go func() {
data, err := page.Eval(jsCode)
done <- struct {
data interface{}
err error
}{data.Value, err}
}()
select {
case result := <-done:
if result.err != nil {
return nil, fmt.Errorf("failed to execute JavaScript: %v", result.err)
}
resultData = result.data
case <-ctx.Done():
return nil, fmt.Errorf("timeout waiting for JavaScript execution")
}
} else {
data, err := page.Eval(jsCode)
if err != nil {
return nil, fmt.Errorf("failed to execute JavaScript: %v", err)
}
resultData = data.Value
}
// Convert result to DiviStructure
var structure DiviStructure
dataBytes, err := json.Marshal(resultData)
if err != nil {
return nil, fmt.Errorf("failed to marshal result data: %v", err)
}
if err := json.Unmarshal(dataBytes, &structure); err != nil {
return nil, fmt.Errorf("failed to unmarshal Divi structure: %v", err)
}
d.debugLog("Successfully extracted Divi structure from tab: %s (found %d sections)", tabID, len(structure.Sections))
return &structure, nil
}
// extractDiviImages extracts all images from a Divi page
func (d *Daemon) extractDiviImages(tabID string, timeout int) ([]DiviImage, error) {
d.debugLog("Extracting Divi images from tab: %s", tabID)
page, err := d.getTab(tabID)
if err != nil {
return nil, err
}
// JavaScript to extract images
jsCode := `
(function() {
const images = [];
let imageIndex = 0;
// Extract regular images
document.querySelectorAll('img').forEach((img) => {
if (img.src && img.src !== '') {
images.push({
url: img.src,
alt: img.alt || '',
title: img.title || '',
width: img.naturalWidth || img.width || 0,
height: img.naturalHeight || img.height || 0,
context: 'image ' + imageIndex,
is_background: false
});
imageIndex++;
}
});
// Extract background images
document.querySelectorAll('*').forEach((element) => {
const styles = window.getComputedStyle(element);
const bgImage = styles.backgroundImage;
if (bgImage && bgImage !== 'none' && !bgImage.includes('gradient')) {
// Extract URL from background-image
const urlMatch = bgImage.match(/url\(['"]?([^'"]+)['"]?\)/);
if (urlMatch && urlMatch[1]) {
images.push({
url: urlMatch[1],
alt: '',
title: '',
width: 0,
height: 0,
context: 'background ' + imageIndex,
is_background: true
});
imageIndex++;
}
}
});
return images;
})();
`
// Execute JavaScript with timeout
var resultData interface{}
if timeout > 0 {
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
done := make(chan struct {
data interface{}
err error
}, 1)
go func() {
data, err := page.Eval(jsCode)
done <- struct {
data interface{}
err error
}{data.Value, err}
}()
select {
case result := <-done:
if result.err != nil {
return nil, fmt.Errorf("failed to execute JavaScript: %v", result.err)
}
resultData = result.data
case <-ctx.Done():
return nil, fmt.Errorf("timeout waiting for JavaScript execution")
}
} else {
data, err := page.Eval(jsCode)
if err != nil {
return nil, fmt.Errorf("failed to execute JavaScript: %v", err)
}
resultData = data.Value
}
// Convert result to []DiviImage
var images []DiviImage
dataBytes, err := json.Marshal(resultData)
if err != nil {
return nil, fmt.Errorf("failed to marshal result data: %v", err)
}
if err := json.Unmarshal(dataBytes, &images); err != nil {
return nil, fmt.Errorf("failed to unmarshal Divi images: %v", err)
}
d.debugLog("Successfully extracted Divi images from tab: %s (found %d images)", tabID, len(images))
return images, nil
}
// extractDiviContent extracts text content and module data from Divi pages
func (d *Daemon) extractDiviContent(tabID string, timeout int) (*DiviContent, error) {
d.debugLog("Extracting Divi content from tab: %s", tabID)
page, err := d.getTab(tabID)
if err != nil {
return nil, err
}
// JavaScript to extract content
jsCode := `
(function() {
const result = {
url: window.location.href,
modules: [],
images: [],
metadata: {
extraction_date: new Date().toISOString(),
total_modules: 0,
total_images: 0
}
};
// Extract all Divi modules
const modules = document.querySelectorAll('[class*="et_pb_module"]');
modules.forEach((module, index) => {
const moduleData = {
type: 'unknown',
content: '',
attributes: {},
css_classes: Array.from(module.classList)
};
// Determine module type and extract content
if (module.classList.contains('et_pb_text')) {
moduleData.type = 'text';
moduleData.content = module.innerHTML;
} else if (module.classList.contains('et_pb_image')) {
moduleData.type = 'image';
const img = module.querySelector('img');
if (img) {
moduleData.attributes.src = img.src;
moduleData.attributes.alt = img.alt || '';
moduleData.attributes.width = img.width.toString();
moduleData.attributes.height = img.height.toString();
}
} else if (module.classList.contains('et_pb_button')) {
moduleData.type = 'button';
const btn = module.querySelector('a');
if (btn) {
moduleData.content = btn.textContent;
moduleData.attributes.href = btn.href;
moduleData.attributes.target = btn.target || '';
}
} else if (module.classList.contains('et_pb_blurb')) {
moduleData.type = 'blurb';
const title = module.querySelector('.et_pb_blurb_content h4');
const content = module.querySelector('.et_pb_blurb_content');
if (title) moduleData.attributes.title = title.textContent;
if (content) moduleData.content = content.innerHTML;
} else if (module.classList.contains('et_pb_cta')) {
moduleData.type = 'cta';
const title = module.querySelector('.et_pb_cta_title');
const content = module.querySelector('.et_pb_cta_content');
if (title) moduleData.attributes.title = title.textContent;
if (content) moduleData.content = content.innerHTML;
}
result.modules.push(moduleData);
});
// Extract images
document.querySelectorAll('img').forEach((img, index) => {
if (img.src && img.src !== '') {
result.images.push({
url: img.src,
alt: img.alt || '',
title: img.title || '',
width: img.naturalWidth || img.width || 0,
height: img.naturalHeight || img.height || 0,
context: 'image ' + index,
is_background: false
});
}
});
result.metadata.total_modules = result.modules.length;
result.metadata.total_images = result.images.length;
return result;
})();
`
// Execute JavaScript with timeout
var resultData interface{}
if timeout > 0 {
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
done := make(chan struct {
data interface{}
err error
}, 1)
go func() {
data, err := page.Eval(jsCode)
done <- struct {
data interface{}
err error
}{data.Value, err}
}()
select {
case result := <-done:
if result.err != nil {
return nil, fmt.Errorf("failed to execute JavaScript: %v", result.err)
}
resultData = result.data
case <-ctx.Done():
return nil, fmt.Errorf("timeout waiting for JavaScript execution")
}
} else {
data, err := page.Eval(jsCode)
if err != nil {
return nil, fmt.Errorf("failed to execute JavaScript: %v", err)
}
resultData = data.Value
}
// Convert result to DiviContent
var content DiviContent
dataBytes, err := json.Marshal(resultData)
if err != nil {
return nil, fmt.Errorf("failed to marshal result data: %v", err)
}
if err := json.Unmarshal(dataBytes, &content); err != nil {
return nil, fmt.Errorf("failed to unmarshal Divi content: %v", err)
}
d.debugLog("Successfully extracted Divi content from tab: %s (found %d modules, %d images)", tabID, content.Metadata.TotalModules, content.Metadata.TotalImages)
return &content, nil
}