Skip to content

Commit

Permalink
Added linear regression to xy scatter
Browse files Browse the repository at this point in the history
  • Loading branch information
katerina7479 committed Jul 15, 2014
1 parent 3d89094 commit a6423f8
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 4 deletions.
3 changes: 3 additions & 0 deletions bin/xyscatterplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ def XYScatterPlotTest():

document.add_xy_scatter(data, cursor, 400, 200, "Ice Cream Sales vs Temperature", None, None, None, ("temperature", "sales"), "Auto", padding=0.11, dots=1)

cursor = PDFCursor(100, 400)
document.add_xy_scatter(data, cursor, 400, 200, "Ice Cream Sales vs Temperature", None, None, None, ("temperature", "sales"), "Auto", padding=0.11, dots=1, linear_regression=True, linear_regression_equation=True)

# Close Document
writer.close()

Expand Down
4 changes: 2 additions & 2 deletions pypdflite/pdfdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ def add_line_graph(self, data, cursor, width, height, title=None, x_axis_limits=
self.set_fill_color(save_fill_color)

def add_xy_scatter(self, data, cursor, width, height, title=None, x_axis_limits=None, y_axis_limits=None, frequency=None, axis_titles=None, axis_labels=None, axis_font_size=None, line_colors=None,
background_style='S', border_size=1, background_border_color=None, background_fill_color=None, padding=0.1, legend=None, dots=None):
background_style='S', border_size=1, background_border_color=None, background_fill_color=None, padding=0.1, legend=None, dots=None, linear_regression=None, linear_regression_equation=None):

save_draw_color = self.draw_color
save_fill_color = self.fill_color
Expand All @@ -508,7 +508,7 @@ def add_xy_scatter(self, data, cursor, width, height, title=None, x_axis_limits=
else:
self.set_font_size(axis_font_size)

graph = PDFXYScatter(self.session, self.page, cursor, data, width, height, title, x_axis_limits, y_axis_limits, frequency, axis_titles, axis_labels, line_colors, background_style, border_size, background_border_color, background_fill_color, padding, legend, dots)
graph = PDFXYScatter(self.session, self.page, cursor, data, width, height, title, x_axis_limits, y_axis_limits, frequency, axis_titles, axis_labels, line_colors, background_style, border_size, background_border_color, background_fill_color, padding, legend, dots, linear_regression, linear_regression_equation)

self.set_font_size(save_font_size)
self.set_draw_color(save_draw_color)
Expand Down
105 changes: 103 additions & 2 deletions pypdflite/pdfobjects/pdfxyscatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

class PDFXYScatter(PDFLineGraph):
def __init__(self, session, page, cursor, data, width, height, title, x_axis_limits, y_axis_limits, frequency, axis_titles, axis_labels, line_colors,
background_style="S", border_size=1, background_border_color=None, background_fill_color=None, padding=0.1, legend=None, dots=None):
background_style="S", border_size=1, background_border_color=None, background_fill_color=None, padding=0.1, legend=None, dots=None, linear_regression=None, linear_regression_equation=None):
self.linear_regression = linear_regression
self.linear_regression_equation = linear_regression_equation
super(PDFXYScatter, self).__init__(session, page, cursor, data, width, height, title, x_axis_limits, y_axis_limits, frequency, axis_titles, axis_labels, line_colors,
background_style, border_size, background_border_color, background_fill_color, padding, legend, dots)

Expand Down Expand Up @@ -45,15 +47,114 @@ def _draw_legend_line(self, index, series_name):
self.legend_data_start.y_plus(1.75 * line_height)

def _draw_dots(self, cursors):
if self.linear_regression:
self.linear_regression_line = LinearRegressionLine()
if self.dots is not None:
for cursor in cursors:
dot = PDFEllipse(self.session, self.page, cursor, PDFCursor(self.dots, self.dots), stroke="F")
dot._draw()
if self.linear_regression:
self.linear_regression_line.add_coord(cursor)
if self.linear_regression:
self.linear_regression_line.calculate_line()
cursor1, cursor2 = self.linear_regression_line.get_cursors()
line = PDFLine(self.session, self.page, cursor1, cursor2)
line._draw()
if self.linear_regression_equation:
text = self.linear_regression_line.get_equation()
text_width = self.session.parent.document.font._string_width(text)
text_height = self.session.parent.document.font.font_size * 1.2
x = cursor2.x + (-text_width)
y = self.linear_regression_line._get_y_at_x(x) + text_height
text = PDFText(self.session, self.page, text, cursor=PDFCursor(x, y))


def _set_color(self, index):
color = self.line_colors[index]
if isinstance(color, PDFColor):
color._set_type('f')
if not self.session._compare_color(color):
self.session._out(color._get_color_string(), self.page)
self.session._save_color(color.copy())
self.session._save_color(color.copy())
color._set_type('d')
if not self.session._compare_color(color):
self.session._out(color._get_color_string(), self.page)
self.session._save_color(color.copy())


class LinearRegressionLine(object):
def __init__(self):
self.min_x = None
self.min_y = None
self.max_x = None
self.max_y = None
self.x_sum = 0
self.y_sum = 0
self.xy_sum = 0
self.x2_sum = 0
self.y2_sum = 0
self.N = 0

def add_coord(self, cursor):
x = cursor.x
y = cursor.y
self.x_sum += x
self.y_sum += y
self.xy_sum += x * y
self.x2_sum += x ** 2
self.y2_sum += y ** 2
self.N += 1

if self.min_x is None:
self.min_x = x
elif x < self.min_x:
self.min_x = x

if self.max_x is None:
self.max_x = x
elif x > self.max_x:
self.max_x = x

if self.min_y is None:
self.min_y = y
elif y < self.min_y:
self.min_y = y

if self.max_y is None:
self.max_y = y
elif y > self.max_y:
self.max_y = y

def calculate_line(self):
self.slope = ((self.N * self.xy_sum) - (self.x_sum * self.y_sum)) / ((self.N * self.x2_sum) - (self.x_sum ** 2))
self.intercept = ((self.x2_sum * self.y_sum) - (self.x_sum * self.xy_sum)) / ((self.N * self.x2_sum) - (self.x_sum ** 2))

return self.slope, self.intercept

def get_cursors(self):
x1 = 0
y1 = 0
if self.intercept < self.min_y:
y1 = self.min_y
x1 = self._get_x_at_y(y1)
if self.intercept >= self.min_y:
x1 = self.min_x
y1 = self._get_y_at_x(x1)

x2 = self.max_x
y2 = self._get_y_at_x(x2)
if y2 > self.max_y:
y2 = self.max_y
x2 = self._get_x_at_y(y2)

return PDFCursor(x1, y1), PDFCursor(x2, y2)

def _get_x_at_y(self, y):
return (y - self.intercept) / float(self.slope)

def _get_y_at_x(self, x):
return self.slope * x + self.intercept

def get_equation(self):
equation = "y = %0.2f x + %0.2f" % (self.slope, self.intercept)
return equation

0 comments on commit a6423f8

Please sign in to comment.